Setting up

library(sparklyr)
spark_installed_versions()

Create spark connection:

sc <- spark_connect(master = "local", version = "2.4.5")
Re-using existing Spark connection to local
sc
$master
[1] "local[4]"

$method
[1] "shell"

$app_name
[1] "sparklyr"

$config
$config$spark.env.SPARK_LOCAL_IP.local
[1] "127.0.0.1"

$config$sparklyr.connect.csv.embedded
[1] "^1.*"

$config$spark.sql.legacy.utcTimestampFunc.enabled
[1] TRUE

$config$sparklyr.connect.cores.local
[1] 4

$config$spark.sql.shuffle.partitions.local
[1] 4

$config$`sparklyr.shell.driver-memory`
[1] "2g"

attr(,"config")
[1] "default"
attr(,"file")
[1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/sparklyr/conf/config-template.yml"

$state
<environment: 0x7f7f7ad3e748>

$extensions
$extensions$jars
character(0)

$extensions$packages
character(0)

$extensions$initializers
list()

$extensions$catalog_jars
character(0)

$extensions$repositories
character(0)


$spark_home
[1] "/Users/glebvulf/spark/spark-2.4.5-bin-hadoop2.7"

$backend
A connection with                              
description "->localhost:8881"
class       "sockconn"        
mode        "wb"              
text        "binary"          
opened      "opened"          
can read    "yes"             
can write   "yes"             

$monitoring
A connection with                              
description "->localhost:8881"
class       "sockconn"        
mode        "wb"              
text        "binary"          
opened      "opened"          
can read    "yes"             
can write   "yes"             

$gateway
A connection with                              
description "->localhost:8880"
class       "sockconn"        
mode        "rb"              
text        "binary"          
opened      "opened"          
can read    "yes"             
can write   "yes"             

$output_file
[1] "/var/folders/sh/klkdl4gx3llb0q9cv8nywfqh0000gn/T//RtmpxlLiu6/file9b65935ac53_spark.log"

$sessionId
[1] 694

$home_version
[1] "2.4.5"

attr(,"class")
[1] "spark_connection"       "spark_shell_connection" "DBIConnection"         
library(tidyverse)
library(janitor)
avocado <- read_csv("avocado.csv") %>%
  clean_names()
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_double(),
  Date = col_date(format = ""),
  AveragePrice = col_double(),
  `Total Volume` = col_double(),
  `4046` = col_double(),
  `4225` = col_double(),
  `4770` = col_double(),
  `Total Bags` = col_double(),
  `Small Bags` = col_double(),
  `Large Bags` = col_double(),
  `XLarge Bags` = col_double(),
  type = col_character(),
  year = col_double(),
  region = col_character()
)
avocado_spark <- copy_to(sc, avocado)
Warning in doTryCatch(return(expr), name, parentenv, handler) :
  restarting interrupted promise evaluation
avocado_spark <- spark_read_csv(sc, path = "avocado.csv")
avocado_spark<- tbl(sc, "avocado")
src_tbls(sc)
[1] "avocado"  "prestige"
class(avocado_spark)
[1] "tbl_spark" "tbl_sql"   "tbl_lazy"  "tbl"      
str(avocado_spark)
List of 2
 $ src:List of 1
  ..$ con:List of 13
  .. ..$ master      : chr "local[4]"
  .. ..$ method      : chr "shell"
  .. ..$ app_name    : chr "sparklyr"
  .. ..$ config      :List of 6
  .. .. ..$ spark.env.SPARK_LOCAL_IP.local           : chr "127.0.0.1"
  .. .. ..$ sparklyr.connect.csv.embedded            : chr "^1.*"
  .. .. ..$ spark.sql.legacy.utcTimestampFunc.enabled: logi TRUE
  .. .. ..$ sparklyr.connect.cores.local             : int 4
  .. .. ..$ spark.sql.shuffle.partitions.local       : int 4
  .. .. ..$ sparklyr.shell.driver-memory             : chr "2g"
  .. .. ..- attr(*, "config")= chr "default"
  .. .. ..- attr(*, "file")= chr "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/sparklyr/conf/config-template.yml"
  .. ..$ state       :<environment: 0x7f7f7ad3e748> 
  .. ..$ extensions  :List of 5
  .. .. ..$ jars        : chr(0) 
  .. .. ..$ packages    : chr(0) 
  .. .. ..$ initializers: list()
  .. .. ..$ catalog_jars: chr(0) 
  .. .. ..$ repositories: chr(0) 
  .. ..$ spark_home  : chr "/Users/glebvulf/spark/spark-2.4.5-bin-hadoop2.7"
  .. ..$ backend     : 'sockconn' int 4
  .. .. ..- attr(*, "conn_id")=<externalptr> 
  .. ..$ monitoring  : 'sockconn' int 6
  .. .. ..- attr(*, "conn_id")=<externalptr> 
  .. ..$ gateway     : 'sockconn' int 3
  .. .. ..- attr(*, "conn_id")=<externalptr> 
  .. ..$ output_file : chr "/var/folders/sh/klkdl4gx3llb0q9cv8nywfqh0000gn/T//RtmpxlLiu6/file9b65935ac53_spark.log"
  .. ..$ sessionId   : num 694
  .. ..$ home_version: chr "2.4.5"
  .. ..- attr(*, "class")= chr [1:3] "spark_connection" "spark_shell_connection" "DBIConnection"
  ..- attr(*, "class")= chr [1:3] "src_spark" "src_sql" "src"
 $ ops:List of 2
  ..$ x   : 'ident' chr "avocado"
  ..$ vars: chr [1:14] "_c0" "Date" "AveragePrice" "Total_Volume" ...
  ..- attr(*, "class")= chr [1:3] "op_base_remote" "op_base" "op"
 - attr(*, "class")= chr [1:4] "tbl_spark" "tbl_sql" "tbl_lazy" "tbl"
library(pryr)
object_size(avocado)
2.06 MB
object_size(avocado_spark)
58.2 kB
selected_avocado_spark <- avocado_spark %>%
  select(AveragePrice) %>%
  summarise(av_mean = mean(AveragePrice)) %>%
  collect() # collect on its own without summarizing is not a good idea, as there could
#be too much data
  
  
#object_size(selected_avocado_spark)
avocado_wo_price <- avocado_spark %>%
  select(-AveragePrice) %>%
  compute("avocado_wo_price")
avocado_spark %>%
  select(AveragePrice) %>%
  show_query()
<SQL>
SELECT `AveragePrice`
FROM `avocado`
avocado_spark <- avocado_spark %>%
  mutate(high_average = AveragePrice > 1.40) %>%
  show_query()
<SQL>
SELECT `_c0`, `Date`, `AveragePrice`, `Total_Volume`, `4046`, `4225`, `4770`, `Total_Bags`, `Small_Bags`, `Large_Bags`, `XLarge_Bags`, `type`, `year`, `region`, `AveragePrice` > 1.4 AS `high_average`
FROM `avocado`
avocado_spark %>%
  select(high_average) %>%
  glimpse() %>%
  show_query()
Rows: ??
Columns: 1
Database: spark_connection
$ high_average <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
<SQL>
SELECT `AveragePrice` > 1.4 AS `high_average`
FROM `avocado`
result <- avocado_spark %>%
  mutate(low_average = AveragePrice < 1.0) %>%
  show_query()
<SQL>
SELECT `_c0`, `Date`, `AveragePrice`, `Total_Volume`, `4046`, `4225`, `4770`, `Total_Bags`, `Small_Bags`, `Large_Bags`, `XLarge_Bags`, `type`, `year`, `region`, `high_average`, `AveragePrice` < 1.0 AS `low_average`
FROM (SELECT `_c0`, `Date`, `AveragePrice`, `Total_Volume`, `4046`, `4225`, `4770`, `Total_Bags`, `Small_Bags`, `Large_Bags`, `XLarge_Bags`, `type`, `year`, `region`, `AveragePrice` > 1.4 AS `high_average`
FROM `avocado`) `dbplyr_016`
collected_result <- result %>%
  collect()

class(result)
[1] "tbl_spark" "tbl_sql"   "tbl_lazy"  "tbl"      
class(collected_result)
[1] "tbl_df"     "tbl"        "data.frame"

Spark web interface

spark_web(sc)

Analysis in Spark

library(sparklyr)
sc <- spark_connect(master = "local", version = "2.4.5")
Re-using existing Spark connection to local
library(dbplot)
library(car)
prestige <- copy_to(sc, Prestige)
prestige %>%
  summarise_if(is.numeric, mean) %>%
  show_query()
Applying predicate on the first 100 rows
<SQL>
SELECT AVG(`education`) AS `education`, AVG(`income`) AS `income`, AVG(`women`) AS `women`, AVG(`prestige`) AS `prestige`, AVG(`census`) AS `census`
FROM `Prestige`
prestige %>%
  summarise_if(is.numeric, var) %>%
  show_query()
Applying predicate on the first 100 rows
<SQL>
SELECT var_samp(`education`) AS `education`, var_samp(`income`) AS `income`, var_samp(`women`) AS `women`, var_samp(`prestige`) AS `prestige`, var_samp(`census`) AS `census`
FROM `Prestige`
prestige %>%
  mutate(secondary_educated = ifelse(education > 7, "Yes", "No")) %>%
  group_by(secondary_educated) %>%
  summarise(mean_income = mean(income)) %>%
  show_query()
<SQL>
SELECT `secondary_educated`, AVG(`income`) AS `mean_income`
FROM (SELECT `education`, `income`, `women`, `prestige`, `census`, `type`, CASE WHEN (`education` > 7.0) THEN ("Yes") WHEN NOT(`education` > 7.0) THEN ("No") END AS `secondary_educated`
FROM `Prestige`) `dbplyr_019`
GROUP BY `secondary_educated`
prestige %>%
  select(income, education) %>%
  glimpse()
Rows: ??
Columns: 2
Database: spark_connection
$ income    <int> 12351, 25879, 9271, 8865, 8403, 11030, 8258, 14163, 11377, 11023, 59…
$ education <dbl> 13.11, 12.26, 12.77, 11.42, 14.62, 15.64, 15.09, 15.44, 14.52, 14.64…
tryCatch(
  {prestige[, c("education", "income")] %>% glimpse()}, 
  error = print
)
<simpleError in prestige[, c("education", "income")]: incorrect number of dimensions>
tryCatch(
  {Prestige[, c("education", "income")] %>% glimpse()}, 
  error = print
)
Rows: 102
Columns: 2
$ education <dbl> 13.11, 12.26, 12.77, 11.42, 14.62, 15.64, 15.09, 15.44, 14.52, 14.64…
$ income    <int> 12351, 25879, 9271, 8865, 8403, 11030, 8258, 14163, 11377, 11023, 59…

Passthrogh

prestige %>%
  summarise(women_percentile = percentile(women, array(0.25, 0.5, 0.75))) %>%
  mutate(women_percentile = explode(women_percentile)) %>%
  show_query()
<SQL>
SELECT explode(`women_percentile`) AS `women_percentile`
FROM (SELECT percentile(`women`, array(0.25, 0.5, 0.75)) AS `women_percentile`
FROM `Prestige`) `dbplyr_021`

Visualisation

Raster Plot

Sparklyr native interface

schema
$education
$education$name
[1] "education"

$education$type
[1] "DoubleType"


$income
$income$name
[1] "income"

$income$type
[1] "IntegerType"


$women
$women$name
[1] "women"

$women$type
[1] "DoubleType"


$prestige
$prestige$name
[1] "prestige"

$prestige$type
[1] "DoubleType"


$census
$census$name
[1] "census"

$census$type
[1] "IntegerType"


$type
$type$name
[1] "type"

$type$type
[1] "StringType"

Native functions

SDF sdf_… functions - Spark DataFrame

Sorting

Sampling

Partitioning (e.g. train/test split)

Binding

ls("package:sparklyr", pattern = "^ft" )
 [1] "ft_binarizer"                      "ft_bucketed_random_projection_lsh"
 [3] "ft_bucketizer"                     "ft_chisq_selector"                
 [5] "ft_count_vectorizer"               "ft_dct"                           
 [7] "ft_discrete_cosine_transform"      "ft_dplyr_transformer"             
 [9] "ft_elementwise_product"            "ft_feature_hasher"                
[11] "ft_hashing_tf"                     "ft_idf"                           
[13] "ft_imputer"                        "ft_index_to_string"               
[15] "ft_interaction"                    "ft_max_abs_scaler"                
[17] "ft_min_max_scaler"                 "ft_minhash_lsh"                   
[19] "ft_ngram"                          "ft_normalizer"                    
[21] "ft_one_hot_encoder"                "ft_one_hot_encoder_estimator"     
[23] "ft_pca"                            "ft_polynomial_expansion"          
[25] "ft_quantile_discretizer"           "ft_r_formula"                     
[27] "ft_regex_tokenizer"                "ft_sql_transformer"               
[29] "ft_standard_scaler"                "ft_stop_words_remover"            
[31] "ft_string_indexer"                 "ft_string_indexer_model"          
[33] "ft_tokenizer"                      "ft_vector_assembler"              
[35] "ft_vector_indexer"                 "ft_vector_slicer"                 
[37] "ft_word2vec"                      

MLlib ft_… - Feature Transformers ml_… - Machine Learning

Parquet format data

prestige_again <- spark_read_parquet(sc, name = "prestige_again", path = "prestige_data") %>%
  glimpse()
Rows: ??
Columns: 6
Database: spark_connection
$ education <dbl> 13.11, 12.26, 12.77, 11.42, 14.62, 15.64, 15.09, 15.44, 14.52, 14.64…
$ income    <int> 12351, 25879, 9271, 8865, 8403, 11030, 8258, 14163, 11377, 11023, 59…
$ women     <dbl> 11.16, 4.02, 15.70, 9.11, 11.68, 5.13, 25.65, 2.69, 1.03, 0.94, 1.91…
$ prestige  <dbl> 68.8, 69.1, 63.4, 56.8, 73.5, 77.6, 72.6, 78.1, 73.1, 68.8, 62.0, 60…
$ census    <int> 1113, 1130, 1171, 1175, 2111, 2113, 2133, 2141, 2143, 2153, 2161, 21…
$ type      <chr> "prof", "prof", "prof", "prof", "prof", "prof", "prof", "prof", "pro…
glimpse(profiles)
Rows: ??
Columns: 31
Database: spark_connection
$ age         <int> 47, 27, 45, 40, 33, 27, 20, 28, 24, 34, 32, 23, 19, 45, 32, 38, 36…
$ body_type   <chr> "athletic", "full figured", NA, "athletic", "athletic", "thin", "c…
$ diet        <chr> "mostly anything", "mostly vegetarian", NA, NA, "mostly anything",…
$ drinks      <chr> "socially", "socially", "socially", "socially", "rarely", "sociall…
$ drugs       <chr> "never", NA, "never", "never", "never", "sometimes", "never", "nev…
$ education   <chr> "graduated from college/university", "graduated from college/unive…
$ essay0      <chr> NA, "still figuring out what to put here... for now i'll leave you…
$ essay1      <chr> "working in a creative industry (brand marketing agency), helping …
$ essay2      <chr> "listening to people, hearing them and working to help them. it's\…
$ essay3      <chr> "my million dollar smile (self-deprecating humor).", "my smile, my…
$ essay4      <chr> "sadly, my reading is confined to newspapers and business\nperiodi…
$ essay5      <chr> "my daughter. usa today. exercise. some travel. world news. ice\nc…
$ essay6      <chr> NA, NA, "why i fill these blocks.<br />\nwhat i really, really wan…
$ essay7      <chr> "picking up my daughter for the weekend or going to the gym and\nc…
$ essay8      <chr> "i can be a procrastinator but it is often driven by my need to\nr…
$ essay9      <chr> "you are looking for an easy going, no drama, try anything once\na…
$ ethnicity   <chr> "white", "white", "other", "white", "white", "hispanic / latin, wh…
$ height      <int> 72, 63, 66, 62, 74, 71, 68, 66, 62, 67, 61, 59, 68, 68, 67, 73, 75…
$ income      <int> -1, -1, -1, -1, -1, -1, 20000, -1, -1, -1, 70000, -1, -1, -1, -1, …
$ job         <chr> "sales / marketing / biz dev", "science / tech / engineering", "me…
$ last_online <chr> "2012-06-29-22-37", "2012-06-30-12-50", "2012-06-30-18-21", "2012-…
$ location    <chr> "san carlos, california", "oakland, california", "san francisco, c…
$ offspring   <chr> "has a kid, but doesn&rsquo;t want more", "doesn&rsquo;t have kids…
$ orientation <chr> "straight", "straight", "straight", "straight", "gay", "gay", "str…
$ pets        <chr> "likes dogs", "likes dogs", "likes dogs and likes cats", "has dogs…
$ religion    <chr> "other", "atheism but not too serious about it", "catholicism but …
$ sex         <chr> "m", "f", "m", "f", "m", "m", "f", "f", "f", "f", "f", "f", "m", "…
$ sign        <chr> "scorpio and it&rsquo;s fun to think about", "sagittarius but it d…
$ smokes      <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
$ speaks      <chr> "english", "english", "english (fluently)", "english", "english", …
$ status      <chr> "single", "single", "single", "single", "single", "single", "singl…
profiles %>%
  summarise_all(.funs = ~sum(as.integer(is.na(.))))
Missing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warning
This warning is displayed only once per session.
profiles_char <- profiles %>%
  select(-c(age, income, height)) %>%
  mutate_all(~ ifelse(is.na(.), "missing", .))
profiles_num <- profiles %>%
  select(age, income, height) %>%
  mutate(
    age = as.numeric(age),
    income = ifelse(income == "-1", NA, as.numeric(income)),
    height = as.numeric(height)
  )
profiles <- sdf_bind_cols(profiles_char, profiles_num) %>%
  compute("profiles")

Machine Learning model

profiles <- profiles %>%
  mutate(
    not_working = as.integer(ifelse(job %in% c("student", "unemployed", "retured"),1,0))
    )

profiles %>%
  count(not_working)

Feature engineering

Categorical encoding ft_string_indexer() ft_one_hot_encoder() to make dummies

profiles <- profiles %>%
  ft_string_indexer(
    input_col = "drinks",
    output_col = "drinks_indexed"
  ) %>%
  ft_one_hot_encoder(
    input_col = "drinks_indexed",
    output_col = "drinks_encoded"
  ) %>%
  ft_string_indexer(
    input_col = "drugs",
    output_col = "drugs_indexed"
  ) %>%
  ft_one_hot_encoder(
    input_col = "drugs_indexed",
    output_col = "drugs_encoded"
  ) %>%
  ft_string_indexer(
    input_col = "status",
    output_col = "status_indexed"
  ) %>%
  ft_one_hot_encoder(
    input_col = "status_indexed",
    output_col = "status_encoded"
  ) %>%
  compute("profiles")
glimpse(profiles)
Rows: ??
Columns: 38
Database: spark_connection
$ body_type      <chr> "athletic", "full figured", "missing", "athletic", "athletic", …
$ diet           <chr> "mostly anything", "mostly vegetarian", "missing", "missing", "…
$ drinks         <chr> "socially", "socially", "socially", "socially", "rarely", "soci…
$ drugs          <chr> "never", "missing", "never", "never", "never", "sometimes", "ne…
$ education      <chr> "graduated from college/university", "graduated from college/un…
$ essay0         <chr> "missing", "still figuring out what to put here... for now i'll…
$ essay1         <chr> "working in a creative industry (brand marketing agency), helpi…
$ essay2         <chr> "listening to people, hearing them and working to help them. it…
$ essay3         <chr> "my million dollar smile (self-deprecating humor).", "my smile,…
$ essay4         <chr> "sadly, my reading is confined to newspapers and business\nperi…
$ essay5         <chr> "my daughter. usa today. exercise. some travel. world news. ice…
$ essay6         <chr> "missing", "missing", "why i fill these blocks.<br />\nwhat i r…
$ essay7         <chr> "picking up my daughter for the weekend or going to the gym and…
$ essay8         <chr> "i can be a procrastinator but it is often driven by my need to…
$ essay9         <chr> "you are looking for an easy going, no drama, try anything once…
$ ethnicity      <chr> "white", "white", "other", "white", "white", "hispanic / latin,…
$ job            <chr> "sales / marketing / biz dev", "science / tech / engineering", …
$ last_online    <chr> "2012-06-29-22-37", "2012-06-30-12-50", "2012-06-30-18-21", "20…
$ location       <chr> "san carlos, california", "oakland, california", "san francisco…
$ offspring      <chr> "has a kid, but doesn&rsquo;t want more", "doesn&rsquo;t have k…
$ orientation    <chr> "straight", "straight", "straight", "straight", "gay", "gay", "…
$ pets           <chr> "likes dogs", "likes dogs", "likes dogs and likes cats", "has d…
$ religion       <chr> "other", "atheism but not too serious about it", "catholicism b…
$ sex            <chr> "m", "f", "m", "f", "m", "m", "f", "f", "f", "f", "f", "f", "m"…
$ sign           <chr> "scorpio and it&rsquo;s fun to think about", "sagittarius but i…
$ smokes         <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no…
$ speaks         <chr> "english", "english", "english (fluently)", "english", "english…
$ status         <chr> "single", "single", "single", "single", "single", "single", "si…
$ age            <dbl> 47, 27, 45, 40, 33, 27, 20, 28, 24, 34, 32, 23, 19, 45, 32, 38,…
$ income         <dbl> NaN, NaN, NaN, NaN, NaN, NaN, 20000, NaN, NaN, NaN, 70000, NaN,…
$ height         <dbl> 72, 63, 66, 62, 74, 71, 68, 66, 62, 67, 61, 59, 68, 68, 67, 73,…
$ not_working    <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ drinks_indexed <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 4, …
$ drinks_encoded <list> [<1, 0, 0, 0, 0, 0>, <1, 0, 0, 0, 0, 0>, <1, 0, 0, 0, 0, 0>, <…
$ drugs_indexed  <dbl> 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 2, 0, 2, 0, 0, 0, 1, 0, 1, 0, …
$ drugs_encoded  <list> [<1, 0, 0>, <0, 1, 0>, <1, 0, 0>, <1, 0, 0>, <1, 0, 0>, <0, 0,…
$ status_indexed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
$ status_encoded <list> [<1, 0, 0, 0>, <1, 0, 0, 0>, <1, 0, 0, 0>, <1, 0, 0, 0>, <1, 0…

Train Test splitting

partitioned <- profiles %>%
  sdf_random_split(training = 0.7, testing = 0.3, seed = 42)

training <- partitioned$training %>%
  compute("training")

testing <- partitioned$testing %>%
  compute("testing")

manual scaling

scale Age

Age - mean, sd

training <- training %>%
  mutate(
    scaled_age = (age - !!scaling_values$mean_age) / !!scaling_values$sd_age
  ) %>%
  glimpse()
Rows: ??
Columns: 39
Database: spark_connection
$ body_type      <chr> "a little extra", "a little extra", "a little extra", "a little…
$ diet           <chr> "anything", "anything", "anything", "anything", "anything", "an…
$ drinks         <chr> "rarely", "socially", "socially", "socially", "socially", "soci…
$ drugs          <chr> "never", "never", "never", "never", "never", "never", "never", …
$ education      <chr> "graduated from masters program", "dropped out of college/unive…
$ essay0         <chr> "hello.... well this is my 1st time on an internet dating site.…
$ essay1         <chr> "working hard, so i can play harder!!!", "trying to find out wh…
$ essay2         <chr> "trying lots of new things :) making new friends.", "listening …
$ essay3         <chr> "that i'm easy going. non-high maintenance.", "my hair", "my cr…
$ essay4         <chr> "i haven't watched tv for about 2 yrs now.... well, except when…
$ essay5         <chr> "family, friends, food, water, shelter... is there a 6th i have…
$ essay6         <chr> "what's in my future.... the next challenge....", "about the fu…
$ essay7         <chr> "working, or trying to sleep b/c i might get called back to wor…
$ essay8         <chr> "hmmmm..... i can't stand piles of dirty laundry or dirty dishe…
$ essay9         <chr> "you meet at least some of the stated interests, or your are wi…
$ ethnicity      <chr> "asian", "hispanic / latin", "white", "white", "white", "hispan…
$ job            <chr> "medicine / health", "other", "computer / hardware / software",…
$ last_online    <chr> "2012-06-30-16-12", "2012-06-22-08-43", "2012-06-13-12-41", "20…
$ location       <chr> "oakland, california", "hayward, california", "emeryville, cali…
$ offspring      <chr> "doesn&rsquo;t have kids, and doesn&rsquo;t want any", "doesn&r…
$ orientation    <chr> "straight", "straight", "straight", "gay", "bisexual", "straigh…
$ pets           <chr> "likes dogs", "has dogs", "likes dogs and likes cats", "has dog…
$ religion       <chr> "missing", "catholicism but not too serious about it", "other a…
$ sex            <chr> "f", "m", "m", "m", "m", "m", "f", "m", "m", "m", "f", "m", "m"…
$ sign           <chr> "leo", "leo and it&rsquo;s fun to think about", "libra but it d…
$ smokes         <chr> "missing", "sometimes", "no", "no", "no", "no", "missing", "som…
$ speaks         <chr> "english", "english (fluently), spanish (okay)", "english (flue…
$ status         <chr> "single", "single", "single", "single", "available", "single", …
$ age            <dbl> 44, 25, 26, 32, 45, 32, 26, 20, 26, 22, 23, 28, 39, 59, 55, 45,…
$ income         <dbl> NaN, 2e+04, 3e+04, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,…
$ height         <dbl> 67, 71, 74, 68, 67, 67, 63, 68, 66, 75, 65, 69, 72, 65, 72, 72,…
$ not_working    <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ drinks_indexed <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, …
$ drinks_encoded <list> [<0, 1, 0, 0, 0, 0>, <1, 0, 0, 0, 0, 0>, <1, 0, 0, 0, 0, 0>, <…
$ drugs_indexed  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0, 3, 2, 1, 0, …
$ drugs_encoded  <list> [<1, 0, 0>, <1, 0, 0>, <1, 0, 0>, <1, 0, 0>, <1, 0, 0>, <1, 0,…
$ status_indexed <dbl> 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, …
$ status_encoded <list> [<1, 0, 0, 0>, <1, 0, 0, 0>, <1, 0, 0, 0>, <1, 0, 0, 0>, <0, 0…
$ scaled_age     <dbl> 1.27623744, -0.77314810, -0.66528570, -0.01811132, 1.38409983, …

Model: not_working ~ scaled_age, drinks, drugs, status

validation_info <- ml_evaluate(logreg_model, training)
validation_info
BinaryLogisticRegressionSummaryImpl 
 Access the following via `$` or `ml_summary()`. 
 - features_col() 
 - label_col() 
 - predictions() 
 - probability_col() 
 - area_under_roc() 
 - f_measure_by_threshold() 
 - pr() 
 - precision_by_threshold() 
 - recall_by_threshold() 
 - roc() 
 - prediction_col() 
 - accuracy() 
 - f_measure_by_label() 
 - false_positive_rate_by_label() 
 - labels() 
 - precision_by_label() 
 - recall_by_label() 
 - true_positive_rate_by_label() 
 - weighted_f_measure() 
 - weighted_false_positive_rate() 
 - weighted_precision() 
 - weighted_recall() 
 - weighted_true_positive_rate() 
roc <- validation_info$roc() %>%
  collect() %>%
  glimpse()
Rows: 110
Columns: 2
$ FPR <dbl> 0.000000000, 0.002642008, 0.007265522, 0.011492734, 0.016776750, 0.0196829…
$ TPR <dbl> 0.00000000, 0.03005464, 0.07103825, 0.09699454, 0.15300546, 0.16530055, 0.…
ggplot(roc, aes(x = FPR, y = TPR)) +
  geom_line() +
  geom_abline(lty = "dashed") +
  coord_fixed()

LS0tCnRpdGxlOiAiU3BhcmsiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMgU2V0dGluZyB1cAoKYGBge3J9CmxpYnJhcnkoc3BhcmtseXIpCmBgYApgYGB7cn0Kc3BhcmtfaW5zdGFsbGVkX3ZlcnNpb25zKCkKYGBgCgpDcmVhdGUgc3BhcmsgY29ubmVjdGlvbjoKYGBge3J9CnNjIDwtIHNwYXJrX2Nvbm5lY3QobWFzdGVyID0gImxvY2FsIiwgdmVyc2lvbiA9ICIyLjQuNSIpCmBgYApgYGB7cn0Kc2MKYGBgCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCmBgYHtyfQpsaWJyYXJ5KGphbml0b3IpCmF2b2NhZG8gPC0gcmVhZF9jc3YoImF2b2NhZG8uY3N2IikgJT4lCiAgY2xlYW5fbmFtZXMoKQpgYGAKCmBgYHtyfQphdm9jYWRvX3NwYXJrIDwtIGNvcHlfdG8oc2MsIGF2b2NhZG8pCmBgYAoKYGBge3J9CmF2b2NhZG9fc3BhcmsgPC0gc3BhcmtfcmVhZF9jc3Yoc2MsIHBhdGggPSAiYXZvY2Fkby5jc3YiKQpgYGAKCmBgYHtyfQphdm9jYWRvX3NwYXJrPC0gdGJsKHNjLCAiYXZvY2FkbyIpCmBgYAoKYGBge3J9CnNyY190YmxzKHNjKQpgYGAKCmBgYHtyfQpjbGFzcyhhdm9jYWRvX3NwYXJrKQpgYGAKCmBgYHtyfQpzdHIoYXZvY2Fkb19zcGFyaykKYGBgCgpgYGB7cn0KbGlicmFyeShwcnlyKQpgYGAKCmBgYHtyfQpvYmplY3Rfc2l6ZShhdm9jYWRvKQpgYGAKYGBge3J9Cm9iamVjdF9zaXplKGF2b2NhZG9fc3BhcmspCmBgYApgYGB7cn0Kc2VsZWN0ZWRfYXZvY2Fkb19zcGFyayA8LSBhdm9jYWRvX3NwYXJrICU+JQogIHNlbGVjdChBdmVyYWdlUHJpY2UpICU+JQogIHN1bW1hcmlzZShhdl9tZWFuID0gbWVhbihBdmVyYWdlUHJpY2UpKSAlPiUKICBjb2xsZWN0KCkgIyBjb2xsZWN0IG9uIGl0cyBvd24gd2l0aG91dCBzdW1tYXJpemluZyBpcyBub3QgYSBnb29kIGlkZWEsIGFzIHRoZXJlIGNvdWxkCiNiZSB0b28gbXVjaCBkYXRhCiAgCiAgCiNvYmplY3Rfc2l6ZShzZWxlY3RlZF9hdm9jYWRvX3NwYXJrKQpgYGAKYGBge3J9CmF2b2NhZG9fd29fcHJpY2UgPC0gYXZvY2Fkb19zcGFyayAlPiUKICBzZWxlY3QoLUF2ZXJhZ2VQcmljZSkgJT4lCiAgY29tcHV0ZSgiYXZvY2Fkb193b19wcmljZSIpCmBgYAoKYGBge3J9CmF2b2NhZG9fc3BhcmsgJT4lCiAgc2VsZWN0KEF2ZXJhZ2VQcmljZSkgJT4lCiAgc2hvd19xdWVyeSgpCmBgYAoKYGBge3J9CmF2b2NhZG9fc3BhcmsgPC0gYXZvY2Fkb19zcGFyayAlPiUKICBtdXRhdGUoaGlnaF9hdmVyYWdlID0gQXZlcmFnZVByaWNlID4gMS40MCkgJT4lCiAgc2hvd19xdWVyeSgpCmBgYAoKYGBge3J9CmF2b2NhZG9fc3BhcmsgJT4lCiAgc2VsZWN0KGhpZ2hfYXZlcmFnZSkgJT4lCiAgZ2xpbXBzZSgpICU+JQogIHNob3dfcXVlcnkoKQpgYGAKCmBgYHtyfQpyZXN1bHQgPC0gYXZvY2Fkb19zcGFyayAlPiUKICBtdXRhdGUobG93X2F2ZXJhZ2UgPSBBdmVyYWdlUHJpY2UgPCAxLjApICU+JQogIHNob3dfcXVlcnkoKQoKY29sbGVjdGVkX3Jlc3VsdCA8LSByZXN1bHQgJT4lCiAgY29sbGVjdCgpCgpjbGFzcyhyZXN1bHQpCmNsYXNzKGNvbGxlY3RlZF9yZXN1bHQpCmBgYAoKU3Bhcmsgd2ViIGludGVyZmFjZQpgYGB7cn0Kc3Bhcmtfd2ViKHNjKQpgYGAKCgojIEFuYWx5c2lzIGluIFNwYXJrCgpgYGB7cn0KbGlicmFyeShzcGFya2x5cikKc2MgPC0gc3BhcmtfY29ubmVjdChtYXN0ZXIgPSAibG9jYWwiLCB2ZXJzaW9uID0gIjIuNC41IikKYGBgCgpgYGB7cn0KbGlicmFyeShkYnBsb3QpCmxpYnJhcnkoY2FyKQpgYGAKCmBgYHtyfQpwcmVzdGlnZSA8LSBjb3B5X3RvKHNjLCBQcmVzdGlnZSkKYGBgCgpgYGB7cn0KcHJlc3RpZ2UgJT4lCiAgc3VtbWFyaXNlX2lmKGlzLm51bWVyaWMsIG1lYW4pICU+JQogIHNob3dfcXVlcnkoKQpgYGAKCmBgYHtyfQpwcmVzdGlnZSAlPiUKICBzdW1tYXJpc2VfaWYoaXMubnVtZXJpYywgdmFyKSAlPiUKICBzaG93X3F1ZXJ5KCkKYGBgCgpgYGB7cn0KcHJlc3RpZ2UgJT4lCiAgbXV0YXRlKHNlY29uZGFyeV9lZHVjYXRlZCA9IGlmZWxzZShlZHVjYXRpb24gPiA3LCAiWWVzIiwgIk5vIikpICU+JQogIGdyb3VwX2J5KHNlY29uZGFyeV9lZHVjYXRlZCkgJT4lCiAgc3VtbWFyaXNlKG1lYW5faW5jb21lID0gbWVhbihpbmNvbWUpKSAlPiUKICBzaG93X3F1ZXJ5KCkKYGBgCgpgYGB7cn0KcHJlc3RpZ2UgJT4lCiAgc2VsZWN0KGluY29tZSwgZWR1Y2F0aW9uKSAlPiUKICBnbGltcHNlKCkKYGBgCgpgYGB7cn0KdHJ5Q2F0Y2goCiAge3ByZXN0aWdlWywgYygiZWR1Y2F0aW9uIiwgImluY29tZSIpXSAlPiUgZ2xpbXBzZSgpfSwgCiAgZXJyb3IgPSBwcmludAopCmBgYAoKYGBge3J9CnRyeUNhdGNoKAogIHtQcmVzdGlnZVssIGMoImVkdWNhdGlvbiIsICJpbmNvbWUiKV0gJT4lIGdsaW1wc2UoKX0sIAogIGVycm9yID0gcHJpbnQKKQpgYGAKClBhc3N0aHJvZ2gKCmBgYHtyfQpwcmVzdGlnZSAlPiUKICBzdW1tYXJpc2Uod29tZW5fcGVyY2VudGlsZSA9IHBlcmNlbnRpbGUod29tZW4sIGFycmF5KDAuMjUsIDAuNSwgMC43NSkpKSAlPiUKICBtdXRhdGUod29tZW5fcGVyY2VudGlsZSA9IGV4cGxvZGUod29tZW5fcGVyY2VudGlsZSkpICU+JQogIHNob3dfcXVlcnkoKQpgYGAKCiMgVmlzdWFsaXNhdGlvbgoKYGBge3J9CnByZXN0aWdlICU+JQogIGdyb3VwX2J5KHR5cGUpICU+JQogIHN1bW1hcmlzZShudW1iZXIgPSBuKCkpICU+JQogIGNvbGxlY3QoKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSB0eXBlLCB5ID0gbnVtYmVyKSkgKwogIGdlb21fY29sKGZpbGwgPSAic3RlZWxibHVlIiwgYWxwaGEgPSAwLjcpCmBgYAoKUmFzdGVyIFBsb3QKYGBge3J9CmxpYnJhcnkoZGJwbG90KQoKcHJlc3RpZ2UgJT4lCiAgZGJwbG90X3Jhc3Rlcih4ID0gZWR1Y2F0aW9uLCB5ID0gaW5jb21lLCByZXNvbHV0aW9uID0gMjApCmBgYAoKIyBTcGFya2x5ciBuYXRpdmUgaW50ZXJmYWNlCgpgYGB7cn0Kc2NoZW1hIDwtIHNkZl9zY2hlbWEocHJlc3RpZ2UpCnNjaGVtYQpgYGAKCiMgTmF0aXZlIGZ1bmN0aW9ucwoKU0RGCnNkZl8uLi4gZnVuY3Rpb25zIC0gU3BhcmsgRGF0YUZyYW1lCgpTb3J0aW5nCgpgYGB7cn0KcHJlc3RpZ2UgJT4lCiAgc2RmX3NvcnQoY29sdW1ucyA9ICJwcmVzdGlnZSIpICU+JQogIGhlYWQoMjApCmBgYAoKU2FtcGxpbmcKCmBgYHtyfQpkb3duc2FtcGxlZF9wcmVzdGlnZSA8LSBwcmVzdGlnZSAlPiUKICBzZGZfc2FtcGxlKGZyYWN0aW9uID0gMC4yLCByZXBsYWNlbWVudCA9IEZBTFNFLCBzZWVkID0gNDIpICU+JQogIGNvbXB1dGUoImRvd25zYW1wbGVkX3ByZXN0aWdlIikKYGBgCgpQYXJ0aXRpb25pbmcgKGUuZy4gdHJhaW4vdGVzdCBzcGxpdCkKCmBgYHtyfQpwYXJ0aXRpb25lZCA8LSBwcmVzdGlnZSAlPiUKICBzZGZfcmFuZG9tX3NwbGl0KHRyYWluaW5nID0gMC43LCB0ZXN0aW5nID0gMC4zKQoKdHJhaW5pbmcgPC0gcGFydGl0aW9uZWQkdHJhaW5pbmcgJT4lCiAgY29tcHV0ZSgidHJhaW5pbmciKQp0ZXN0aW5nIDwtIHBhcnRpdGlvbmVkJHRlc3RpbmcgJT4lCiAgY29tcHV0ZSgidGVzdGluZyIpCmBgYApgYGB7cn0KcHJlc3RpZ2UgJT4lCiAgY291bnQoKQpgYGAKYGBge3J9CnRyYWluaW5nICU+JQogIGNvdW50KCkKYGBgCmBgYHtyfQp0ZXN0aW5nICU+JQogIGNvdW50KCkKYGBgCgpCaW5kaW5nCgpgYGB7cn0KcmVhc3NlbWJsZWQgPC10cmFpbmluZyAlPiUKICBzZGZfYmluZF9yb3dzKHRlc3RpbmcpCmBgYAoKYGBge3J9CnJlYXNzZW1ibGVkICU+JQogIGNvdW50KCkKYGBgCgpgYGB7cn0KbHMoInBhY2thZ2U6c3BhcmtseXIiLCBwYXR0ZXJuID0gIl5mdCIgKQpgYGAKCgpNTGxpYgpmdF8uLi4gLSBGZWF0dXJlIFRyYW5zZm9ybWVycwptbF8uLi4gLSBNYWNoaW5lIExlYXJuaW5nCgojIFBhcnF1ZXQgZm9ybWF0IGRhdGEKCmBgYHtyfQpzcGFya193cml0ZV9wYXJxdWV0KHByZXN0aWdlLCAicHJlc3RpZ2VfZGF0YSIpCmBgYAoKYGBge3J9CnByZXN0aWdlX2FnYWluIDwtIHNwYXJrX3JlYWRfcGFycXVldChzYywgbmFtZSA9ICJwcmVzdGlnZV9hZ2FpbiIsIHBhdGggPSAicHJlc3RpZ2VfZGF0YSIpICU+JQogIGdsaW1wc2UoKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgpgYGB7cn0KcHJvZmlsZXMgPC0gc3BhcmtfcmVhZF9jc3Yoc2MsICJwcm9maWxlcy5jc3YiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgZXNjYXBlID0gIlwiIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lbW9yeSA9IEZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgICAgICBvcHRpb25zID0gbGlzdChtdWx0aWxpbmUgPSBUUlVFKSkKYGBgCgpgYGB7cn0KZ2xpbXBzZShwcm9maWxlcykKYGBgCgpgYGB7cn0KcHJvZmlsZXMgJT4lCiAgc3VtbWFyaXNlX2FsbCguZnVucyA9IH5zdW0oYXMuaW50ZWdlcihpcy5uYSguKSkpKQpgYGAKCgpgYGB7cn0KcHJvZmlsZXMgJT4lCiAgY291bnQoKQpgYGAKYGBge3J9CnByb2ZpbGVzICU+JQogIHN1bW1hcmlzZShudW1fbmVnX2luY29tZSA9IHN1bShhcy5pbnRlZ2VyKGluY29tZSA8IDAgKSkpCmBgYAoKYGBge3J9CnByb2ZpbGVzX2NoYXIgPC0gcHJvZmlsZXMgJT4lCiAgc2VsZWN0KC1jKGFnZSwgaW5jb21lLCBoZWlnaHQpKSAlPiUKICBtdXRhdGVfYWxsKH4gaWZlbHNlKGlzLm5hKC4pLCAibWlzc2luZyIsIC4pKQpgYGAKCmBgYHtyfQpwcm9maWxlc19udW0gPC0gcHJvZmlsZXMgJT4lCiAgc2VsZWN0KGFnZSwgaW5jb21lLCBoZWlnaHQpICU+JQogIG11dGF0ZSgKICAgIGFnZSA9IGFzLm51bWVyaWMoYWdlKSwKICAgIGluY29tZSA9IGlmZWxzZShpbmNvbWUgPT0gIi0xIiwgTkEsIGFzLm51bWVyaWMoaW5jb21lKSksCiAgICBoZWlnaHQgPSBhcy5udW1lcmljKGhlaWdodCkKICApCmBgYAoKYGBge3J9CnByb2ZpbGVzIDwtIHNkZl9iaW5kX2NvbHMocHJvZmlsZXNfY2hhciwgcHJvZmlsZXNfbnVtKSAlPiUKICBjb21wdXRlKCJwcm9maWxlcyIpCmBgYAoKYGBge3J9CnByb2ZpbGVzICU+JQogIHN1bW1hcmlzZV9hbGwoIH5zdW0oYXMuaW50ZWdlcihpcy5uYSguKSkpKQogIGdsaW1wc2UoKQpgYGAKCmBgYHtyfQpzY2hlbWEgPC0gc2RmX3NjaGVtYShwcm9maWxlcykKCnNjaGVtYSAlPiUKICB0cmFuc3Bvc2UoKSAlPiUKICBhc190aWJibGUoKSAlPiUKICB1bm5lc3QoY29scyA9IGMobmFtZSwgdHlwZSkpCmBgYAoKYGBge3J9CmNvbnRpbmdlbmN5IDwtIHByb2ZpbGVzICU+JQogIHNkZl9jcm9zc3RhYigiZHJpbmtzIiwgImRydWdzIikgJT4lCiAgY29sbGVjdCgpCgpjb250aW5nZW5jeSAlPiUKICByZW5hbWUoZHJpbmtzID0gZHJpbmtzX2RydWdzKSAlPiUKICBtdXRhdGUoCiAgICBkcmlua3MgPSBhc19mYWN0b3IoZHJpbmtzKSAlPiUKICAgICAgZmN0X3JlbGV2ZWwoIm1pc3NpbmciLCAibm90IGF0IGFsbCIsICJyYXJlbHkiLCAic29jaWFsbHkiLCAib2Z0ZW4iLCAidmVyeSBvZnRlbiIsICJkZXNwZXJhdGVseSIpCiAgKSAlPiUKICBhcnJhbmdlKGRyaW5rcykgJT4lCiAgc2VsZWN0KGRyaW5rcywgbWlzc2luZywgbmV2ZXIsIHNvbWV0aW1lcywgb2Z0ZW4pCmBgYAoKCk1hY2hpbmUgTGVhcm5pbmcgbW9kZWwKCmBgYHtyfQpwcm9maWxlcyA8LSBwcm9maWxlcyAlPiUKICBtdXRhdGUoCiAgICBub3Rfd29ya2luZyA9IGFzLmludGVnZXIoaWZlbHNlKGpvYiAlaW4lIGMoInN0dWRlbnQiLCAidW5lbXBsb3llZCIsICJyZXR1cmVkIiksMSwwKSkKICAgICkKCnByb2ZpbGVzICU+JQogIGNvdW50KG5vdF93b3JraW5nKQpgYGAKCmBgYHtyfQpzZGZfZGVzY3JpYmUocHJvZmlsZXMsIGNvbHMgPSBjKCJhZ2UiLCAiaGVpZ2h0IiwgImluY29tZSIsICJub3Rfd29ya2luZyIpKQpgYGAKCiMgRmVhdHVyZSBlbmdpbmVlcmluZwoKQ2F0ZWdvcmljYWwgZW5jb2RpbmcKZnRfc3RyaW5nX2luZGV4ZXIoKQpmdF9vbmVfaG90X2VuY29kZXIoKSB0byBtYWtlIGR1bW1pZXMKCmBgYHtyfQpwcm9maWxlcyA8LSBwcm9maWxlcyAlPiUKICBmdF9zdHJpbmdfaW5kZXhlcigKICAgIGlucHV0X2NvbCA9ICJkcmlua3MiLAogICAgb3V0cHV0X2NvbCA9ICJkcmlua3NfaW5kZXhlZCIKICApICU+JQogIGZ0X29uZV9ob3RfZW5jb2RlcigKICAgIGlucHV0X2NvbCA9ICJkcmlua3NfaW5kZXhlZCIsCiAgICBvdXRwdXRfY29sID0gImRyaW5rc19lbmNvZGVkIgogICkgJT4lCiAgZnRfc3RyaW5nX2luZGV4ZXIoCiAgICBpbnB1dF9jb2wgPSAiZHJ1Z3MiLAogICAgb3V0cHV0X2NvbCA9ICJkcnVnc19pbmRleGVkIgogICkgJT4lCiAgZnRfb25lX2hvdF9lbmNvZGVyKAogICAgaW5wdXRfY29sID0gImRydWdzX2luZGV4ZWQiLAogICAgb3V0cHV0X2NvbCA9ICJkcnVnc19lbmNvZGVkIgogICkgJT4lCiAgZnRfc3RyaW5nX2luZGV4ZXIoCiAgICBpbnB1dF9jb2wgPSAic3RhdHVzIiwKICAgIG91dHB1dF9jb2wgPSAic3RhdHVzX2luZGV4ZWQiCiAgKSAlPiUKICBmdF9vbmVfaG90X2VuY29kZXIoCiAgICBpbnB1dF9jb2wgPSAic3RhdHVzX2luZGV4ZWQiLAogICAgb3V0cHV0X2NvbCA9ICJzdGF0dXNfZW5jb2RlZCIKICApICU+JQogIGNvbXB1dGUoInByb2ZpbGVzIikKYGBgCgpgYGB7cn0KZ2xpbXBzZShwcm9maWxlcykKYGBgCgpUcmFpbiBUZXN0IHNwbGl0dGluZwoKYGBge3J9CnBhcnRpdGlvbmVkIDwtIHByb2ZpbGVzICU+JQogIHNkZl9yYW5kb21fc3BsaXQodHJhaW5pbmcgPSAwLjcsIHRlc3RpbmcgPSAwLjMsIHNlZWQgPSA0MikKCnRyYWluaW5nIDwtIHBhcnRpdGlvbmVkJHRyYWluaW5nICU+JQogIGNvbXB1dGUoInRyYWluaW5nIikKCnRlc3RpbmcgPC0gcGFydGl0aW9uZWQkdGVzdGluZyAlPiUKICBjb21wdXRlKCJ0ZXN0aW5nIikKYGBgCgojIG1hbnVhbCBzY2FsaW5nCgpzY2FsZSBBZ2UKCkFnZSAtIG1lYW4sIHNkCgpgYGB7cn0Kc2NhbGluZ192YWx1ZXMgPC0gdHJhaW5pbmcgJT4lCiAgc3VtbWFyaXNlKAogICAgbWVhbl9hZ2UgPSBtZWFuKGFnZSksCiAgICBzZF9hZ2UgID0gc2QoYWdlKQogICkgJT4lCiAgY29sbGVjdCgpCnNjYWxpbmdfdmFsdWVzCmBgYAoKYGBge3J9CnRyYWluaW5nIDwtIHRyYWluaW5nICU+JQogIG11dGF0ZSgKICAgIHNjYWxlZF9hZ2UgPSAoYWdlIC0gISFzY2FsaW5nX3ZhbHVlcyRtZWFuX2FnZSkgLyAhIXNjYWxpbmdfdmFsdWVzJHNkX2FnZQogICkgJT4lCiAgZ2xpbXBzZSgpCmBgYAoKTW9kZWw6IG5vdF93b3JraW5nIH4gc2NhbGVkX2FnZSwgZHJpbmtzLCBkcnVncywgc3RhdHVzCgpgYGB7cn0KdHJhaW5pbmcgPC0gdHJhaW5pbmcgJT4lCiAgZnRfdmVjdG9yX2Fzc2VtYmxlcigKICAgIGlucHV0X2NvbHMgPSBjKCJzY2FsZWRfYWdlIiwgImRyaW5rc19lbmNvZGVkIiwgImRydWdzX2VuY29kZWQiLCAic3RhdHVzX2VuY29kZWQiKSwKICAgIG91dHB1dF9jb2wgPSAiZmVhdHVyZXMiCiAgKQpgYGAKCmBgYHtyfQp0cmFpbmluZyAlPiUKICBjb2xsZWN0KCkKYGBgCgpgYGB7cn0KbG9ncmVnX21vZGVsIDwtIHRyYWluaW5nICU+JQogIG1sX2xvZ2lzdGljX3JlZ3Jlc3Npb24oCiAgICBsYWJlbF9jb2wgPSAibm90X3dvcmtpbmciLAogICAgZmVhdHVyZXNfY29sID0gImZlYXR1cmVzIgogICkKYGBgCmBgYHtyfQp2YWxpZGF0aW9uX2luZm8gPC0gbWxfZXZhbHVhdGUobG9ncmVnX21vZGVsLCB0cmFpbmluZykKdmFsaWRhdGlvbl9pbmZvCmBgYAoKYGBge3J9CnJvYyA8LSB2YWxpZGF0aW9uX2luZm8kcm9jKCkgJT4lCiAgY29sbGVjdCgpICU+JQogIGdsaW1wc2UoKQpgYGAKCmBgYHtyfQpnZ3Bsb3Qocm9jLCBhZXMoeCA9IEZQUiwgeSA9IFRQUikpICsKICBnZW9tX2xpbmUoKSArCiAgZ2VvbV9hYmxpbmUobHR5ID0gImRhc2hlZCIpICsKICBjb29yZF9maXhlZCgpCmBgYAoK